from DrissionPage import ChromiumPage
import pyautogui
from tqdm import tqdm
from bs4 import BeautifulSoup
import pymysql
import re


def clean_text(text):
    # 定义一个正则表达式模式，匹配所有非字母、非数字、非汉字、非常见标点符号的字符
    pattern = re.compile(r'[^\u4e00-\u9fa5a-zA-Z0-9\s.,!?@#$%^&*()_+=-]')
    # 使用正则表达式替换匹配到的字符为空字符串
    cleaned_text = re.sub(pattern, '', text)
    return cleaned_text


product_list = [
    '康比特',
    # '赛霸',
    # 'FoYes',
    # '训练怪兽',
    # 'ON',
    # '马泰时刻',
    # 'Marathontime Premium Line',
    # '诺特兰德',
    # '紫光优健',
    # '必第能量',
    # 'SIX STAR',
    # 'MUSCLETECH',
    # 'ALLMAX',
    # 'VIKING FORCE LABORATORIES',
    # '元气码头',
    # 'KEEP MOMENT',
    # "MUSCLE'SPEAK",
    'WILD FIELD HEALTH',
    '科派诺',
    '肌肉博士',
    '海德力',
    'MYPROTEIN'
]


def get_video(keyword):
    driver = ChromiumPage()
    driver.set.window.max()
    driver.get('https://www.douyin.com/search/type%3Dvideo?type=video')
    driver.wait(2)
    driver.ele('xpath://*[@id="douyin-header"]/div[1]/header/div[1]/div/div[1]/div/div[2]/div/div/input').clear()
    driver.ele('xpath://*[@id="douyin-header"]/div[1]/header/div[1]/div/div[1]/div/div[2]/div/div/input').input(keyword)
    driver.wait(1)
    driver.ele('xpath://*[@id="douyin-header"]/div[1]/header/div[1]/div/div[1]/div/div[2]/div/button').click()
    driver.wait(2)
    # 点击筛选
    driver.ele('.QfeM8ow3').hover()
    driver.wait(1)
    # 最新发布
    driver.eles('.eXMmo3JR')[0].click()
    driver.wait(2)
    pyautogui.click(1, 1)
    while True:
        driver.scroll.to_bottom()
        driver.wait(0.5)
        try:
            position = pyautogui.locateOnScreen(r'E:\pythonproject\douyingpinglun\picture\nomore.png', confidence=0.8)
            print(position)
            if position:
                break
        except:
            pass
        try:
            position = pyautogui.locateOnScreen(r'E:\pythonproject\douyingpinglun\picture\nomore2.png', confidence=0.8)
            if position:
                return None

        except:
            pass
    video_list = driver.eles('.SwZLHMKk SEbmeLLH')
    print(f'共有{len(video_list)}个视频')
    DB_CONFIG = {
        'host': 'rm-2zea30h4sh8g15zd1ho.mysql.rds.aliyuncs.com',
        'port': 3306,
        'user': "root",
        'password': 'Ds2024@()833429',
        'database': "douyinpinglun"
    }
    mydb = pymysql.connect(**DB_CONFIG)
    cursor = mydb.cursor()
    print('数据库连接成功')
    for index, video in tqdm(enumerate(video_list)):
        html_content = video.html
        soup = BeautifulSoup(html_content, 'html.parser')
        # 提取视频链接
        video_link = soup.find('a', class_='hY8lWHgA _4furHfW')['href']
        if 'https:' not in video_link:
            video_link = 'https:' + video_link
        # 提取视频名称
        video_name = soup.find('div', class_='VDYK8Xd7').text

        # 提取视频作者
        video_author = soup.find('span', class_='MZNczJmS').text

        # 提取视频发布时间
        video_time = soup.find('span', class_='faDtinfi').text

        print(f"视频链接:{video_link}")
        print(f"视频名称:{video_name}")
        print(f"视频作者:{video_author}")
        print(f"视频发布时间:{video_time}")
        video_author = clean_text(video_author)
        video_name = clean_text(video_name)

        insert_sql = f'''
    INSERT INTO 抖音视频表 (视频名称, 视频作者, 视频发布时间, 视频链接, 搜索关键词)
    VALUES ('{video_name}', '{video_author}', '{video_time}', '{video_link}', '{keyword}');
    '''

        try:
            cursor.execute(insert_sql)
            mydb.commit()
        except:
            pass
        driver.wait(0.05)
    cursor.close()
    mydb.close()


if __name__ == '__main__':
    for product in product_list:
        keyword = product + '蛋白粉怎么样'
        get_video(keyword)
